{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "sentiment_analysis_tutorial.ipynb",
      "provenance": [],
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/towardsai/tutorials/blob/master/sentiment_analysis_tutorial/sentiment_analysis_tutorial.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "WF4Qh5xVqM4D"
      },
      "source": [
        "# Sentiment Analysis on Women's Dress Review\r\n",
        "\r\n",
        "* Tutorial: https://towardsai.net/p/nlp/sentiment-analysis-opinion-mining-with-python-nlp-tutorial-d1f173ca4e3c\r\n",
        "\r\n",
        "* Github: https://github.com/towardsai/tutorials/tree/master/sentyment_analysis_tutorial"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "cCdKoergjmDw"
      },
      "source": [
        "**Download Dataset**"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "VLl8z7VEjpMw",
        "outputId": "f9c9a841-ef4d-4658-9445-30b7acf02068"
      },
      "source": [
        "!wget https://raw.githubusercontent.com/towardsai/tutorials/master/sentiment_analysis_tutorial/women_clothing_review.csv"
      ],
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "--2020-12-09 22:10:49--  https://raw.githubusercontent.com/towardsai/tutorials/master/sentiment_analysis_tutorial/women_clothing_review.csv\n",
            "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...\n",
            "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.\n",
            "HTTP request sent, awaiting response... 200 OK\n",
            "Length: 8480236 (8.1M) [text/plain]\n",
            "Saving to: ‘women_clothing_review.csv’\n",
            "\n",
            "women_clothing_revi 100%[===================>]   8.09M  25.7MB/s    in 0.3s    \n",
            "\n",
            "2020-12-09 22:10:50 (25.7 MB/s) - ‘women_clothing_review.csv’ saved [8480236/8480236]\n",
            "\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "-hBtFjjoqV2_"
      },
      "source": [
        "**Import All Required Packages**"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "-sSRIdY1HZCS",
        "outputId": "ab15e23e-0924-42bb-c694-f4ddb58df4ef"
      },
      "source": [
        "import pandas as pd\n",
        "import numpy as np\n",
        "import seaborn as sns\n",
        "import re\n",
        "import string\n",
        "from string import punctuation\n",
        "import nltk\n",
        "from nltk.corpus import stopwords\n",
        "nltk.download('stopwords')\n",
        "\n",
        "\n",
        "import matplotlib.pyplot as plt\n",
        "from sklearn.model_selection import train_test_split\n",
        "from sklearn.feature_extraction.text import CountVectorizer\n",
        "from sklearn.feature_extraction.text import TfidfTransformer\n",
        "\n",
        "import tensorflow as tf\n",
        "from tensorflow.keras.models import Sequential\n",
        "from tensorflow.keras.layers import Dense, Activation, Dropout\n",
        "from tensorflow.keras.callbacks import EarlyStopping"
      ],
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
            "[nltk_data]   Package stopwords is already up-to-date!\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "YUH7jv5KrgG-"
      },
      "source": [
        "**Read data from csv**"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 306
        },
        "id": "aBdnYpaqqEfs",
        "outputId": "c4a417df-8a1e-42b2-de17-ed4e672cbbf3"
      },
      "source": [
        "df = pd.read_csv('women_clothing_review.csv')\n",
        "df.head()"
      ],
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Unnamed: 0</th>\n",
              "      <th>Clothing ID</th>\n",
              "      <th>Age</th>\n",
              "      <th>Title</th>\n",
              "      <th>Review Text</th>\n",
              "      <th>Rating</th>\n",
              "      <th>Recommended IND</th>\n",
              "      <th>Positive Feedback Count</th>\n",
              "      <th>Division Name</th>\n",
              "      <th>Department Name</th>\n",
              "      <th>Class Name</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>0</td>\n",
              "      <td>767</td>\n",
              "      <td>33</td>\n",
              "      <td>NaN</td>\n",
              "      <td>Absolutely wonderful - silky and sexy and comf...</td>\n",
              "      <td>4</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "      <td>Initmates</td>\n",
              "      <td>Intimate</td>\n",
              "      <td>Intimates</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>1</td>\n",
              "      <td>1080</td>\n",
              "      <td>34</td>\n",
              "      <td>NaN</td>\n",
              "      <td>Love this dress!  it's sooo pretty.  i happene...</td>\n",
              "      <td>5</td>\n",
              "      <td>1</td>\n",
              "      <td>4</td>\n",
              "      <td>General</td>\n",
              "      <td>Dresses</td>\n",
              "      <td>Dresses</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>2</td>\n",
              "      <td>1077</td>\n",
              "      <td>60</td>\n",
              "      <td>Some major design flaws</td>\n",
              "      <td>I had such high hopes for this dress and reall...</td>\n",
              "      <td>3</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>General</td>\n",
              "      <td>Dresses</td>\n",
              "      <td>Dresses</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>3</td>\n",
              "      <td>1049</td>\n",
              "      <td>50</td>\n",
              "      <td>My favorite buy!</td>\n",
              "      <td>I love, love, love this jumpsuit. it's fun, fl...</td>\n",
              "      <td>5</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "      <td>General Petite</td>\n",
              "      <td>Bottoms</td>\n",
              "      <td>Pants</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>4</td>\n",
              "      <td>847</td>\n",
              "      <td>47</td>\n",
              "      <td>Flattering shirt</td>\n",
              "      <td>This shirt is very flattering to all due to th...</td>\n",
              "      <td>5</td>\n",
              "      <td>1</td>\n",
              "      <td>6</td>\n",
              "      <td>General</td>\n",
              "      <td>Tops</td>\n",
              "      <td>Blouses</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "   Unnamed: 0  Clothing ID  Age  ...   Division Name Department Name  Class Name\n",
              "0           0          767   33  ...       Initmates        Intimate   Intimates\n",
              "1           1         1080   34  ...         General         Dresses     Dresses\n",
              "2           2         1077   60  ...         General         Dresses     Dresses\n",
              "3           3         1049   50  ...  General Petite         Bottoms       Pants\n",
              "4           4          847   47  ...         General            Tops     Blouses\n",
              "\n",
              "[5 rows x 11 columns]"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 8
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "N1DXfvBurnSO"
      },
      "source": [
        "**Drop unnecessary columns**"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Gn_vu_gprlK1"
      },
      "source": [
        "df = df.drop(['Title', 'Positive Feedback Count', 'Unnamed: 0', ], axis=1)\n",
        "df.dropna(inplace=True)"
      ],
      "execution_count": 9,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "VqT2elfcs4tj"
      },
      "source": [
        "**Calculation of Polarity**"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "GY08-o4YzR1i"
      },
      "source": [
        "df['Polarity_Rating'] = df['Rating'].apply(lambda x: 'Positive' if x > 3 else('Neutral' if x == 3  else 'Negative'))"
      ],
      "execution_count": 10,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 202
        },
        "id": "6b0k-DA-0IYJ",
        "outputId": "623e6ff7-6699-4a3d-b08f-8f79bf2ce8d3"
      },
      "source": [
        "df.head()"
      ],
      "execution_count": 11,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Clothing ID</th>\n",
              "      <th>Age</th>\n",
              "      <th>Review Text</th>\n",
              "      <th>Rating</th>\n",
              "      <th>Recommended IND</th>\n",
              "      <th>Division Name</th>\n",
              "      <th>Department Name</th>\n",
              "      <th>Class Name</th>\n",
              "      <th>Polarity_Rating</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>767</td>\n",
              "      <td>33</td>\n",
              "      <td>Absolutely wonderful - silky and sexy and comf...</td>\n",
              "      <td>4</td>\n",
              "      <td>1</td>\n",
              "      <td>Initmates</td>\n",
              "      <td>Intimate</td>\n",
              "      <td>Intimates</td>\n",
              "      <td>Positive</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>1080</td>\n",
              "      <td>34</td>\n",
              "      <td>Love this dress!  it's sooo pretty.  i happene...</td>\n",
              "      <td>5</td>\n",
              "      <td>1</td>\n",
              "      <td>General</td>\n",
              "      <td>Dresses</td>\n",
              "      <td>Dresses</td>\n",
              "      <td>Positive</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>1077</td>\n",
              "      <td>60</td>\n",
              "      <td>I had such high hopes for this dress and reall...</td>\n",
              "      <td>3</td>\n",
              "      <td>0</td>\n",
              "      <td>General</td>\n",
              "      <td>Dresses</td>\n",
              "      <td>Dresses</td>\n",
              "      <td>Neutral</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>1049</td>\n",
              "      <td>50</td>\n",
              "      <td>I love, love, love this jumpsuit. it's fun, fl...</td>\n",
              "      <td>5</td>\n",
              "      <td>1</td>\n",
              "      <td>General Petite</td>\n",
              "      <td>Bottoms</td>\n",
              "      <td>Pants</td>\n",
              "      <td>Positive</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>847</td>\n",
              "      <td>47</td>\n",
              "      <td>This shirt is very flattering to all due to th...</td>\n",
              "      <td>5</td>\n",
              "      <td>1</td>\n",
              "      <td>General</td>\n",
              "      <td>Tops</td>\n",
              "      <td>Blouses</td>\n",
              "      <td>Positive</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "   Clothing ID  Age  ... Class Name  Polarity_Rating\n",
              "0          767   33  ...  Intimates         Positive\n",
              "1         1080   34  ...    Dresses         Positive\n",
              "2         1077   60  ...    Dresses          Neutral\n",
              "3         1049   50  ...      Pants         Positive\n",
              "4          847   47  ...    Blouses         Positive\n",
              "\n",
              "[5 rows x 9 columns]"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 11
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "lMD-lue81NME"
      },
      "source": [
        "**Plot the Rating visualization graph**"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 297
        },
        "id": "EwQ5_FWh1R-p",
        "outputId": "1db2e56f-a522-400a-8edf-235f14cd73dd"
      },
      "source": [
        "sns.set_style('whitegrid')\n",
        "sns.countplot(x='Rating',data=df, palette='YlGnBu_r')"
      ],
      "execution_count": 12,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "<matplotlib.axes._subplots.AxesSubplot at 0x7efe0bb5e240>"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 12
        },
        {
          "output_type": "display_data",
          "data": {
            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZEAAAEGCAYAAACkQqisAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAb2UlEQVR4nO3de3BU9d3H8feSNJEhN4PJrlSGTgDbiJLYpzbEpKEubhISUgIk7UyrA2kZdWBII5pKtEbkZnVQqZOpkmZGsaNWQZNMs4wEl0uSCsVHTDPYHTupk05w2I3EkACa27LPH3lYy2BsOLJ7CPm8/kp+ey7f3xrOx9+5/I7F7/f7ERERMWCS2QWIiMj4pRARERHDFCIiImKYQkRERAxTiIiIiGHhZhcQaq2trURGRppdhojIuDIwMEBqaupF7RMuRCIjI0lOTja7DBGRccXtdn9lu05niYiIYQoRERExTCEiIiKGKURERMQwhYiIiBimEBEREcMUIiIiYphCREREDFOIiIiIYQoREZFLdM7vM7uEoDDSrwk37YmIyDc1yRLGxyf3ml3GZZd0neOS19FIREREDFOIiIiIYQoRERExLGghUlFRQXp6OosWLQq0Pfnkk+Tm5lJQUMDq1avp6+sLfLZ9+3YcDgc5OTk0NzcH2puamsjJycHhcFBdXR1o7+zspLi4GIfDQVlZGYODg8HqioiIjCJoIbJ06VJqamouaMvIyKChoYG//OUvfOc732H79u0AtLe343Q6cTqd1NTU8Pjjj+Pz+fD5fGzYsIGamhqcTicNDQ20t7cDsHXrVlasWMHevXuJiYlh165dweqKiIiMImghcttttxEbG3tBW2ZmJuHhIzeEpaam4vF4AHC5XOTn5xMREcH06dOZMWMGbW1ttLW1MWPGDKZPn05ERAT5+fm4XC78fj+HDx8mJycHgCVLluByuYLVFRERGYVpt/i++eabLFy4EACv10tKSkrgM6vVitfrBcBms13Q3tbWRk9PDzExMYFAstlsgeX/m4GBgVHf0CUiMhZX89tRL/X4aEqIPP/884SFhfGTn/wk5PvW63FFREY32vFxtHAJeYi89dZbHDhwgJdeegmLxQKMjDDOn9qCkZGJ1WoF+Mr2a6+9lr6+PoaHhwkPD8fj8QSWFxGR0AnpLb5NTU3U1NTw/PPPM3ny5EC73W7H6XQyODhIZ2cnHR0dzJ07l1tuuYWOjg46OzsZHBzE6XRit9uxWCykpaWxZ88eAGpra7Hb7aHsioiIEMSRyNq1azly5Ag9PT1kZWWxZs0aqqurGRwcpKSkBICUlBQ2bNjA7NmzWbhwIXl5eYSFhVFZWUlYWBgAlZWVrFy5Ep/Px7Jly5g9ezYA5eXl3H///Wzbto3k5GSKi4uD1RURERmFxe/3+80uIpTcbreuiYjINzbR5s4a7dipJ9ZFRMQwhYiIiBimEBEREcMUIiIiYphCREREDFOIiIiIYQoRERExTCEiIiKGKURERMQwhYiIiBimEBEREcMUIiIiYphCREREDFOIiIiIYQoRERExTCEiIiKGKURERMQwhYiIiBimEBEREcMUIiIiYphCREREDFOIiIiIYQoRERExTCEiIiKGBS1EKioqSE9PZ9GiRYG2U6dOUVJSQnZ2NiUlJfT29gLg9/vZtGkTDoeDgoICPvzww8A6tbW1ZGdnk52dTW1tbaD92LFjFBQU4HA42LRpE36/P1hdERGRUQQtRJYuXUpNTc0FbdXV1aSnp9PY2Eh6ejrV1dUANDU10dHRQWNjIxs3bmT9+vXASOhUVVXxxhtvsHPnTqqqqgLBs379ejZu3EhjYyMdHR00NTUFqysiIjKKoIXIbbfdRmxs7AVtLpeLwsJCAAoLC3nnnXcuaLdYLKSmptLX10dXVxctLS1kZGQQFxdHbGwsGRkZNDc309XVxZkzZ0hNTcVisVBYWIjL5QpWV0REZBThodxZd3c3iYmJACQkJNDd3Q2A1+vFZrMFlrPZbHi93ovarVbrV7afX34sBgYGcLvdl6M7IjJBJScnm11C0Fzq8TGkIfKfLBYLFosl5PuNjIy8qv8ARES+idGOj6OFS0jvzpo6dSpdXV0AdHV1ER8fD4yMMDweT2A5j8eD1Wq9qN3r9X5l+/nlRUQktEIaIna7nbq6OgDq6upYsGDBBe1+v5/W1laio6NJTEwkMzOTlpYWent76e3tpaWlhczMTBITE4mKiqK1tRW/33/BtkREJHSCdjpr7dq1HDlyhJ6eHrKyslizZg333HMPZWVl7Nq1i2nTprFt2zYA5s+fz8GDB3E4HEyePJktW7YAEBcXx6pVqygqKgJg9erVxMXFAfDYY49RUVFBf38/WVlZZGVlBasrIiIyCot/gj1g4Xa7dU1ERL6xj0/uNbuEyy7pOseon4127NQT6yIiYphCREREDFOIiIiIYQoRERExTCEiIiKGKURERMQwhYiIiBimEBEREcMUIiIiYphCREREDFOIiIiIYQoRERExTCEiIiKGKURERMQwhYiIiBimEBEREcMUIiIiYphCREREDFOIiIiIYQoRERExTCEiIiKGKURERMQwhYiIiBimEBEREcNMCZGXXnqJ/Px8Fi1axNq1axkYGKCzs5Pi4mIcDgdlZWUMDg4CMDg4SFlZGQ6Hg+LiYo4fPx7Yzvbt23E4HOTk5NDc3GxGV0REJrSQh4jX6+Xll1/mzTffpKGhAZ/Ph9PpZOvWraxYsYK9e/cSExPDrl27ANi5cycxMTHs3buXFStWsHXrVgDa29txOp04nU5qamp4/PHH8fl8oe6OiMiEZspIxOfz0d/fz/DwMP39/SQkJHD48GFycnIAWLJkCS6XC4B9+/axZMkSAHJycjh06BB+vx+Xy0V+fj4RERFMnz6dGTNm0NbWZkZ3REQmrJCHiNVq5Ze//CV33HEHmZmZREVFMWfOHGJiYggPDwfAZrPh9XqBkZHL9ddfD0B4eDjR0dH09PTg9Xqx2WwXbPf8OiIiEhrhod5hb28vLpcLl8tFdHQ0v/71r0N6PWNgYAC32x2y/YnI1Sc5OdnsEoLmUo+PIQ+Rd999lxtuuIH4+HgAsrOzOXr0KH19fQwPDxMeHo7H48FqtQIjI4wTJ05gs9kYHh7m9OnTXHvttVitVjweT2C7Xq83sM7XiYyMvKr/AEREvonRjo+jhUvIT2dNmzaNv//973zxxRf4/X4OHTrErFmzSEtLY8+ePQDU1tZit9sBsNvt1NbWArBnzx7mzZuHxWLBbrfjdDoZHByks7OTjo4O5s6dG+ruiIhMaCEfiaSkpJCTk8OSJUsIDw8nOTmZn/3sZ/z4xz/m/vvvZ9u2bSQnJ1NcXAxAUVER5eXlOBwOYmNjefbZZwGYPXs2CxcuJC8vj7CwMCorKwkLCwt1d0REJjSL3+/3m11EKLndbp3OEpFv7OOTe80u4bJLus4x6mejHTv1xLqIiBimEBEREcMUIiIiYphCREREDFOIiIiIYQoRERExTCEiIiKGjSlEli9fPqY2ERGZWL72ifWBgQG++OILenp66O3t5fxziWfOnNGMuSIi8vUh8uc//5kdO3bQ1dXF0qVLAyESFRXFXXfdFZICRUTkyvW1IbJ8+XKWL1/On/70J+6+++5Q1SQiIuPEmCZgvPvuuzl69CiffPLJBa+gLSwsDFphIiJy5RtTiJSXl9PZ2cn3vve9wEy5FotFISIiMsGNKUSOHTvG7t27sVgswa5HRETGkTHd4jt79mw+/fTTYNciIiLjzJhGIj09PeTn5zN37ly+9a1vBdpfeOGFoBUmIiJXvjGFyJo1a4Jdh4iIjENjCpEf/vCHwa5DRETGoTGFyK233hq4qD40NMTw8DCTJ0/m6NGjQS1ORESubGMKkQ8++CDws9/vx+Vy0draGrSiRERkfLjkWXwtFgt33nknLS0twahHRETGkTGNRBobGwM/nzt3jmPHjhEZGRm0okREZHwYU4js378/8HNYWBjf/va3+cMf/hC0okREZHwYU4g88cQTwa5DRETGoTFdE/F4PKxevZr09HTS09NZs2YNHo/H8E77+vooLS0lNzeXhQsX8sEHH3Dq1ClKSkrIzs6mpKSE3t5eYORC/qZNm3A4HBQUFPDhhx8GtlNbW0t2djbZ2dnU1tYarkdERIwZU4hUVFRgt9tpbm6mubmZO+64g4qKCsM73bx5Mz/60Y94++23qa+vZ+bMmVRXV5Oenk5jYyPp6elUV1cD0NTUREdHB42NjWzcuJH169cDcOrUKaqqqnjjjTfYuXMnVVVVgeAREZHQGFOIfPbZZyxbtozw8HDCw8NZunQpn332maEdnj59mvfee4+ioiIAIiIiiImJweVyBWYFLiws5J133gEItFssFlJTU+nr66Orq4uWlhYyMjKIi4sjNjaWjIwMmpubDdUkIiLGjClE4uLiqK+vx+fz4fP5qK+vJy4uztAOjx8/Tnx8PBUVFRQWFvLII4/w+eef093dTWJiIgAJCQl0d3cD4PV6sdlsgfVtNhter/eidqvVqlf2ioiE2JgurG/ZsoWNGzfyxBNPYLFYuPXWW/nd735naIfDw8P84x//4NFHHyUlJYVNmzYFTl2dZ7FYgjbt/MDAAG63OyjbFpGJITk52ewSguZSj49jCpHnnnuOJ598ktjYWGDkesSTTz5p6K4tm82GzWYjJSUFgNzcXKqrq5k6dSpdXV0kJibS1dVFfHw8MDLC+M+L+B6PB6vVitVq5ciRI4F2r9c7pjm+IiMjr+o/ABGRb2K04+No4TKm01kfffRRIEBg5PSW0f+bT0hIwGaz8fHHHwNw6NAhZs6cid1up66uDoC6ujoWLFgAEGj3+/20trYSHR1NYmIimZmZtLS00NvbS29vLy0tLWRmZhqqSUREjBnTSOTcuXP09vZeMBL5z3etX6pHH32UBx98kKGhIaZPn84TTzzBuXPnKCsrY9euXUybNo1t27YBMH/+fA4ePIjD4WDy5Mls2bIFGAmyVatWBS7Qr1692vB1GhERMcbi9/v9/22huro6XnjhBXJzcwF4++23ue+++8blO9bdbrdOZ4nIN/bxyb1ml3DZJV3nGPWz0Y6dYxqJFBYWcvPNN3P48GEAqqqqmDVrlsEyRUTkajGmEAGYNWuWgkNERC5wyVPBi4iInKcQERERwxQiIiJimEJEREQMU4iIiIhhChERETFMISIiIoYpRERExDCFiIiIGKYQERERwxQiIiJimEJERMZk+Jzx1z9cya7WfoXKmCdgFJGJLXxSGC99sMfsMi67FbfmmF3CuKaRiIiIGKYQERERwxQiIiJimEJEREQMU4iIiIhhChERETFMISIiIoYpRERExDCFiIiIGKYQERERw0wLEZ/PR2FhIffeey8AnZ2dFBcX43A4KCsrY3BwEIDBwUHKyspwOBwUFxdz/PjxwDa2b9+Ow+EgJyeH5uZmU/ohIjKRmRYiL7/8MjNnzgz8vnXrVlasWMHevXuJiYlh165dAOzcuZOYmBj27t3LihUr2Lp1KwDt7e04nU6cTic1NTU8/vjj+HyaSE1EJJRMCRGPx8OBAwcoKioCwO/3c/jwYXJyRiZCW7JkCS6XC4B9+/axZMkSAHJycjh06BB+vx+Xy0V+fj4RERFMnz6dGTNm0NbWZkZ3REQmLFNm8d2yZQvl5eWcPXsWgJ6eHmJiYggPHynHZrPh9XoB8Hq9XH/99SPFhocTHR1NT08PXq+XlJSUwDatVmtgna8zMDCA2+2+3F0SueolJyebXULQXOoxQd/Fl0IeIvv37yc+Pp6bb76Zv/3tb6HePZGRkVf1H4CIXDodE7402ncxWriEPESOHj3Kvn37aGpqYmBggDNnzrB582b6+voYHh4mPDwcj8eD1WoFRkYYJ06cwGazMTw8zOnTp7n22muxWq14PJ7Adr1eb2AdEREJjZBfE3nggQdoampi3759PPPMM8ybN4+nn36atLQ09uwZeeFNbW0tdrsdALvdTm1tLQB79uxh3rx5WCwW7HY7TqeTwcFBOjs76ejoYO7cuaHujojIhHbFPCdSXl7Oiy++iMPh4NSpUxQXFwNQVFTEqVOncDgcvPjiizz44IMAzJ49m4ULF5KXl8fKlSuprKwkLCzMzC6IiEw4Fr/f7ze7iFByu906/ylikF6P+6WPT+69zJWYL+k6x6ifjXbsvGJGIiIiMv4oRERExDCFiIiIGKYQERERwxQiIiJimEJEREQMU4iIiIhhChERETFMISIiIoYpRERExDCFiIiIGKYQERERwxQiIiJimEJE5GsM+nxmlxAUV2u/JPRMece6yHgRERbGsjecZpdx2b3503yzS5CrhEYiIiJimEJEREQMU4iIiIhhChERETFMISIiIoYpRERExDCFiIiIGKYQERERwxQiIiJiWMhD5MSJE9x9993k5eWRn5/Pjh07ADh16hQlJSVkZ2dTUlJCb28vAH6/n02bNuFwOCgoKODDDz8MbKu2tpbs7Gyys7Opra0NdVdERCa8kIdIWFgY69atY/fu3bz++uu8+uqrtLe3U11dTXp6Oo2NjaSnp1NdXQ1AU1MTHR0dNDY2snHjRtavXw+MhE5VVRVvvPEGO3fupKqqKhA8IiISGiEPkcTERObMmQNAVFQUSUlJeL1eXC4XhYWFABQWFvLOO+8ABNotFgupqan09fXR1dVFS0sLGRkZxMXFERsbS0ZGBs3NzaHujojIhGbqBIzHjx/H7XaTkpJCd3c3iYmJACQkJNDd3Q2A1+vFZrMF1rHZbHi93ovarVYrXq/3v+5zYGAAt9t9mXsiV6vk5GSzSwiaS/13oO/iS/ouvmRaiJw9e5bS0lIefvhhoqKiLvjMYrFgsViCst/IyMir+g9AZKz07+BL+i6+NNp3MVq4mHJ31tDQEKWlpRQUFJCdnQ3A1KlT6erqAqCrq4v4+HhgZITh8XgC63o8HqxW60XtXq8Xq9Uawl6IiEjIQ8Tv9/PII4+QlJRESUlJoN1ut1NXVwdAXV0dCxYsuKDd7/fT2tpKdHQ0iYmJZGZm0tLSQm9vL729vbS0tJCZmRnq7oiITGghP531/vvvU19fz4033sjixYsBWLt2Lffccw9lZWXs2rWLadOmsW3bNgDmz5/PwYMHcTgcTJ48mS1btgAQFxfHqlWrKCoqAmD16tXExcWFujsiIhNayEPkBz/4AR999NFXfnb+mZH/ZLFYeOyxx75y+aKiokCIiIhI6OmJdRERMUwhIhcZGPKZXUJQXK39EjGTqc+JyJUp8lth/M9vXzW7jMvu/U0/N7sEkauORiIiImKYQkRERAxTiIiIiGEKERERMUwhIiIihilERETEMIWIiIgYphARERHDFCIiImKYQkRERAxTiPy/gcFhs0sIiqu1XyJyZdDcWf8vMiKcH+Q9bXYZl93/7n7A7BJE5CqmkYiIiBimEBEREcMUIiIiYphCREREDFOIiIiIYQoRERExTCEiIiKGKURERMQwhYiIiBg27kOkqamJnJwcHA4H1dXVZpcjIjKhjOsQ8fl8bNiwgZqaGpxOJw0NDbS3t5tdlojIhDGuQ6StrY0ZM2Ywffp0IiIiyM/Px+VymV2WiMiEYfH7/X6zizDq7bffprm5mc2bNwNQV1dHW1sblZWVo67T2tpKZGRkqEoUEbkqDAwMkJqaelH7hJvF96u+BBERMWZcn86yWq14PJ7A716vF6vVamJFIiITy7gOkVtuuYWOjg46OzsZHBzE6XRit9vNLktEZMIY16ezwsPDqaysZOXKlfh8PpYtW8bs2bPNLktEZMIY1xfWRUTEXOP6dJaIiJhLISIiIoaN62si401FRQUHDhxg6tSpNDQ0mF2OqU6cOMFvfvMburu7sVgs/PSnP2X58uVml2WKgYEBfvGLXzA4OIjP5yMnJ4fS0lKzyzLN+eubVquV7du3m12Oqex2O1OmTGHSpEmEhYXx1ltvmV3SRRQiIbR06VLuuusuHnroIbNLMV1YWBjr1q1jzpw5nDlzhmXLlpGRkcGsWbPMLi3kIiIi2LFjB1OmTGFoaIif//znZGVlTdhnml5++WVmzpzJmTNnzC7lirBjxw7i4+PNLmNUOp0VQrfddhuxsbFml3FFSExMZM6cOQBERUWRlJSE1+s1uSpzWCwWpkyZAsDw8DDDw8NYLBaTqzKHx+PhwIEDFBUVmV2KjJFCREx3/Phx3G43KSkpZpdiGp/Px+LFi7n99tu5/fbbJ+x3sWXLFsrLy5k0SYem8371q1+xdOlSXn/9dbNL+Ur6LyWmOnv2LKWlpTz88MNERUWZXY5pwsLCqK+v5+DBg7S1tfHPf/7T7JJCbv/+/cTHx3PzzTebXcoV47XXXqO2tpY//vGPvPLKK7z33ntml3QRhYiYZmhoiNLSUgoKCsjOzja7nCtCTEwMaWlpNDc3m11KyB09epR9+/Zht9tZu3Ythw8f5sEHHzS7LFOdn8Zp6tSpOBwO2traTK7oYgoRMYXf7+eRRx4hKSmJkpISs8sx1WeffUZfXx8A/f39vPvuuyQlJZlcVeg98MADNDU1sW/fPp555hnmzZvH1q1bzS7LNJ9//nng5oLPP/+cv/71r1fkjBy6OyuE1q5dy5EjR+jp6SErK4s1a9ZQXFxsdlmmeP/996mvr+fGG29k8eLFwMj3M3/+fJMrC72uri7WrVuHz+fD7/eTm5vLHXfcYXZZYrLu7m5Wr14NjFwzW7RoEVlZWSZXdTFNeyIiIobpdJaIiBimEBEREcMUIiIiYphCREREDFOIiIiIYbrFV+QySk5O5sYbb8Tn83HDDTfw1FNPERMTM+rybrebrq6uwK3NLpeLf/3rX9xzzz2hKlnkG9FIROQyuuaaa6ivr6ehoYHY2FheeeWVr13e7XZz8ODBwO8LFixQgMi4opGISJCkpqby0UcfAdDW1sbmzZsZGBjgmmuuYcuWLdxwww0899xz9Pf38/7773PvvffS39/PsWPHqKysZN26dURFRXHs2DE+/fRTysvLyc3N5dy5c2zYsIHDhw9z/fXXEx4ezrJly8jNzTW5xzIRaSQiEgQ+n49Dhw5ht9sBSEpK4pVXXqGuro7S0lKeffZZIiIiKC0tJS8vj/r6evLy8i7aTldXF6+++irbt2/n6aefBqCxsZFPPvmE3bt389RTT9Ha2hrSvon8J41ERC6j/v5+Fi9ejNfrZebMmWRkZABw+vRpHnroIf79739jsVgYGhoa0/buvPNOJk2axKxZszh58iQwMmVMbm4ukyZNIiEhgbS0tKD1R+S/0UhE5DI6f01k//79+P3+wDWR3//+96SlpdHQ0MDzzz/P4ODgmLYXERERzHJFvjGFiEgQTJ48md/+9re8+OKLDA8Pc/r06cC03rW1tYHlpkyZwtmzZy9p29///vdpbGzk3LlznDx5kiNHjlzW2kUuhUJEJEhuuukmvvvd79LQ0MDKlSt55plnKCwsZHh4OLBMWloa7e3tLF68mN27d49puzk5OVitVvLy8igvL+emm24iOjo6WN0Q+VqaxVdkHDp79ixTpkyhp6eH4uJiXnvtNRISEswuSyYgXVgXGYfuu+8++vr6GBoaYtWqVQoQMY1GIiIiYpiuiYiIiGEKERERMUwhIiIihilERETEMIWIiIgY9n8hNLqkZIbSLgAAAABJRU5ErkJggg==\n",
            "text/plain": [
              "<Figure size 432x288 with 1 Axes>"
            ]
          },
          "metadata": {
            "tags": []
          }
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "cObGcj-R1_Uf"
      },
      "source": [
        "**Plot the Polarity Rating count**"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 297
        },
        "id": "TglKEwxS2DcK",
        "outputId": "32a9bc27-7a55-450e-c1fd-e3380a0af5d1"
      },
      "source": [
        "sns.set_style('whitegrid')\n",
        "sns.countplot(x='Polarity_Rating',data=df, palette='summer')"
      ],
      "execution_count": 13,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "<matplotlib.axes._subplots.AxesSubplot at 0x7efe0bb424a8>"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 13
        },
        {
          "output_type": "display_data",
          "data": {
            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZEAAAEHCAYAAABvHnsJAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3df1RVdb7/8ecBBE0K/HUOSyMblzRx8wc11gxXxesxUDkgBJJja0oZ/XpVRgftjlesQUUtnfI7pjYmi7lzu423MgahPJbkmQLN0n7I12you+xKYnc4zEJAKQXF/f3Dr+cbo9hh5+FAvB5rtRb7c/aP94dPntfZe5/9wWIYhoGIiIgJAf4uQEREui+FiIiImKYQERER0xQiIiJimkJERERMC/J3AZ2toqKCkJAQf5chItKtNDc3ExMTc1V7jwuRkJAQoqOj/V2GiEi3UllZec12Xc4SERHTFCIiImKaQkRERExTiIiIiGkKERERMc1n387Kycnh7bffZsCAAezevRuA7OxsTpw4AcDZs2e5+eabKSkp4dSpUyQmJvKDH/wAgNGjR5OXlwfAsWPHyMnJ4fz580yYMIHHHnsMi8VCQ0MDS5Ys4csvv2TIkCFs2rSJsLAwX3VHRESuwWdnImlpaRQUFLRp27RpEyUlJZSUlJCQkEB8fLzntdtuu83z2pUAAVi1ahVr1qyhtLSUqqoqysvLAcjPzyc2NpbS0lJiY2PJz8/3VVdERKQdPguRe++9t90zA8MweP3110lKSrruPmpra2lqaiImJgaLxUJqaioulwsAl8tFamoqAKmpqezbt+/GdkBERL6VXx42/OCDDxgwYAC33367p+3UqVOkpqYSGhpKdnY2Y8aMwe12ExER4VknIiICt9sNQF1dHVarFYBBgwZRV1fn1bGbm5vbfWhGREQ6xi8hsnv37jZnIVarlbfeeot+/fpx7NgxsrKycDqdXu/PYrFgsVi8WrcjT6y3XLxAcFAvr+uQjtPvWKR7aO/Dd6eHyMWLF3nzzTcpKirytAUHBxMcHAzAiBEjuO222zhx4gQ2m42amhrPejU1NdhsNgAGDBhAbW0tVquV2tpa+vfvf8NrDQ7qRfKzC274fuX/ey1rm79LEJHvoNO/4nvw4EGGDRvW5jLV6dOnaW1tBaC6upqqqioiIyOxWq2EhoZSUVGBYRgUFxczadIkAOx2O8XFxQBt2kVEpPP47Exk6dKlHD58mPr6euLi4li0aBEZGRns2bMHh8PRZt3333+fzZs3ExQUREBAAKtXryY8PByAlStXer7iGxcXR1xcHADz5s0jOzubwsJCBg8ezKZNm3zVFRERaYfFMAzD30V0psrKyg7N4qvLWb6ly1ki3UN77516Yl1ERExTiIiIiGkKERERMU0hIiIipilERETENIWIiIiYphARERHTFCIiImKaQkRERExTiIiIiGkKERERMU0hIiIipilERETENIWIiIiYphARERHTFCIiImKaQkRERExTiIiIiGkKERERMU0hIiIipvksRHJycoiNjSUpKcnTtmXLFsaPH09KSgopKSmUlZV5Xtu+fTvx8fFMnjyZ/fv3e9rLy8uZPHky8fHx5Ofne9qrq6vJyMggPj6e7OxsWlpafNUVERFph89CJC0tjYKCgqvaZ8+eTUlJCSUlJUyYMAGA48eP43Q6cTqdFBQUsHr1alpbW2ltbSUvL4+CggKcTie7d+/m+PHjADz99NPMnj2bN998k1tuuYXCwkJfdUVERNrhsxC59957CQsL82pdl8uFw+EgODiYyMhIhg4dytGjRzl69ChDhw4lMjKS4OBgHA4HLpcLwzB47733mDx5MgAPPPAALpfLV10REZF2dPo9kR07dpCcnExOTg6NjY0AuN1uIiIiPOvYbDbcbne77fX19dxyyy0EBQUBEBERgdvt7tyOiIgIQZ15sJkzZ7Jw4UIsFgvPPPMM69ev58knn+zMEmhubqaystKrdaOjo31cjQBej4eIdD2dGiIDBw70/JyRkcH8+fOBy2cYNTU1ntfcbjc2mw3gmu39+vXjzJkzXLx4kaCgIGpqajzrf5uQkBCFQxej8RDp+tr7sNepl7Nqa2s9P+/bt4+oqCgA7HY7TqeTlpYWqqurqaqqYtSoUYwcOZKqqiqqq6tpaWnB6XRit9uxWCz8+Mc/Zu/evQDs2rULu93emV0RERF8eCaydOlSDh8+TH19PXFxcSxatIjDhw/z6aefAjBkyBDy8vIAiIqKYurUqSQmJhIYGEhubi6BgYEA5ObmMnfuXFpbW0lPT/cEz69+9SuWLFnCpk2biI6OJiMjw1ddERGRdlgMwzD8XURnqqys7NDlk+RnF/iwGnkta5u/SxARL7T33qkn1kVExDSFiIiImKYQERER0xQiIiJimkJERERMU4iIiIhpChERETFNISIiIqYpRERExDSFiIiImKYQERER0xQiIiJimkJERERMU4iIiIhpChERETFNISIiIqYpRERExDSFiIiImKYQERER0xQiIiJimkJERERM81mI5OTkEBsbS1JSkqdtw4YNTJkyheTkZLKysjhz5gwAp06dYtSoUaSkpJCSkkJubq5nm2PHjpGcnEx8fDxr167FMAwAGhoayMzMJCEhgczMTBobG33VFRERaYfPQiQtLY2CgoI2bWPHjmX37t289tpr3H777Wzfvt3z2m233UZJSQklJSXk5eV52letWsWaNWsoLS2lqqqK8vJyAPLz84mNjaW0tJTY2Fjy8/N91RUREWmHz0Lk3nvvJSwsrE3buHHjCAoKAiAmJoaamprr7qO2tpampiZiYmKwWCykpqbicrkAcLlcpKamApCamsq+fft80AsREbmeIH8d+E9/+hNTp071LJ86dYrU1FRCQ0PJzs5mzJgxuN1uIiIiPOtERETgdrsBqKurw2q1AjBo0CDq6uq8Om5zczOVlZVerRsdHe1td+Q78HY8RKTr8UuIbNu2jcDAQKZNmwaA1Wrlrbfeol+/fhw7doysrCycTqfX+7NYLFgsFq/WDQkJUTh0MRoPka6vvQ97nR4iRUVFvP322/z7v/+7540/ODiY4OBgAEaMGMFtt93GiRMnsNlsbS551dTUYLPZABgwYAC1tbVYrVZqa2vp379/Z3dFRKTH69Sv+JaXl1NQUMC2bdvo06ePp/306dO0trYCUF1dTVVVFZGRkVitVkJDQ6moqMAwDIqLi5k0aRIAdrud4uJigDbtIiLSeXx2JrJ06VIOHz5MfX09cXFxLFq0iPz8fFpaWsjMzARg9OjR5OXl8f7777N582aCgoIICAhg9erVhIeHA7By5UpycnI4f/48cXFxxMXFATBv3jyys7MpLCxk8ODBbNq0yVddERGRdliMKw9e9BCVlZUdugaf/OwCH1Yjr2Vt83cJIuKF9t479cS6iIiYphARERHTFCIiImKaQkRERExTiIiIiGkKERERMU0hIiIipilERETENIWIiIiYphARERHTFCIiImKaQkRERExTiIiIiGkKERERMU0hIiIipilERETENIWIiIiYphARERHTFCIiImKaQkRERExTiIiIiGk+DZGcnBxiY2NJSkrytDU0NJCZmUlCQgKZmZk0NjYCYBgGa9euJT4+nuTkZD755BPPNrt27SIhIYGEhAR27drlaT927BjJycnEx8ezdu1aDMPwZXdEROTv+DRE0tLSKCgoaNOWn59PbGwspaWlxMbGkp+fD0B5eTlVVVWUlpayZs0aVq1aBVwOna1bt7Jz505eeeUVtm7d6gmeVatWsWbNGkpLS6mqqqK8vNyX3RERkb/jVYjMmjXLq7a/d++99xIWFtamzeVykZqaCkBqair79u1r026xWIiJieHMmTPU1tZy4MABxo4dS3h4OGFhYYwdO5b9+/dTW1tLU1MTMTExWCwWUlNTcblc3nRHRERukKDrvdjc3My5c+eor6+nsbHRc7moqakJt9tt6oB1dXVYrVYABg0aRF1dHQBut5uIiAjPehEREbjd7qvabTbbNduvrP9tmpubqays9KrW6Ohor9aT78bb8RCRrue6IfLSSy/x/PPPU1tbS1pamidEQkND+dnPfvadD26xWLBYLN95Px0REhKicOhiNB4iXV97H/auGyKzZs1i1qxZvPDCCzz88MM3pJABAwZQW1uL1WqltraW/v37A5fPMGpqajzr1dTUYLPZsNlsHD582NPudru577772l1fREQ6j1f3RB5++GE++ugjXnvtNYqLiz3/mWG32z3bFhcXM2nSpDbthmFQUVHBzTffjNVqZdy4cRw4cIDGxkYaGxs5cOAA48aNw2q1EhoaSkVFBYZhtNmXiIh0juueiVzxq1/9iurqau68804CAwMBPDezr2fp0qUcPnyY+vp64uLiWLRoEfPmzSM7O5vCwkIGDx7Mpk2bAJgwYQJlZWXEx8fTp08fnnjiCQDCw8NZuHAh06dPByArK4vw8HAAVq5cSU5ODufPnycuLo64uDhzvwURETHFYnjxcMXUqVPZs2dPp9+/8IXKysoOXYNPfnaBD6uR17K2+bsEEfFCe++dXl3OioqK4m9/+9sNL0pERLo3ry5n1dfX43A4GDVqFL169fK0P/fccz4rTEREuj6vQmTRokW+rkNERLohr0Lkvvvu83UdIiLSDXkVInfffbfnpvqFCxe4ePEiffr04aOPPvJpcSIi0rV5FSJHjhzx/GwYBi6Xi4qKCp8VJSIi3UOHZ/G1WCzcf//9HDhwwBf1iIhIN+LVmUhpaann50uXLnHs2DFCQkJ8VpSIiHQPXoXIW2+95fk5MDCQIUOG8Lvf/c5nRYmISPfgVYg8+eSTvq5DRES6Ia/uidTU1JCVlUVsbCyxsbEsWrSozQy6IiLSM3kVIjk5Odjtdvbv38/+/fuZOHEiOTk5vq5NRES6OK9C5PTp06SnpxMUFERQUBBpaWmcPn3a17WJiEgX51WIhIeHU1JSQmtrK62trZSUlHimYxcRkZ7LqxB54okneP311xk7dizjxo1j7969rF+/3te1iYhIF+fVt7M2b97Mhg0bCAsLA6ChoYENGzboW1siIj2cV2cin332mSdA4PLlrfb+aLuIiPQcXoXIpUuXaGxs9Cw3NDTQ2trqs6JERKR78Opy1s9//nNmzJjBlClTAHjjjTeYP3++TwsTEZGuz6sQSU1NZcSIEbz33nsAbN26leHDh/u0MBER6fq8ChGA4cOHKzhERKQNr0PkRvnv//5vlixZ4lmurq5m8eLFnD17lp07d9K/f38Ali5dyoQJEwDYvn07hYWFBAQE8PjjjzN+/HgAysvLWbduHZcuXSIjI4N58+Z1dndERHq0Tg+RYcOGUVJSAkBraytxcXHEx8dTVFTE7NmzmTNnTpv1jx8/jtPpxOl04na7yczMZO/evQDk5eXxhz/8AZvNxvTp07Hb7TpbEhHpRJ0eIt/07rvvEhkZyZAhQ9pdx+Vy4XA4CA4OJjIykqFDh3L06FEAhg4dSmRkJAAOhwOXy6UQERHpRH4NEafTSVJSkmd5x44dFBcXM2LECJYvX05YWBhut5vRo0d71rHZbLjdbgAiIiLatF8Jl+tpbm72+hmX6Ohob7si34GeORLpvvwWIi0tLfz5z3/m0UcfBWDmzJksXLgQi8XCM888w/r1633yRHxISIjCoYvReIh0fe192Ovw31i/UcrLy7nrrrsYOHAgAAMHDiQwMJCAgAAyMjL4+OOPgctnGN/82yVutxubzdZuu4iIdB6/hYjT6cThcHiWa2trPT/v27ePqKgoAOx2O06nk5aWFqqrq6mqqmLUqFGMHDmSqqoqqquraWlpwel0YrfbO70fIiI9mV8uZ3399dccPHiQvLw8T9tTTz3Fp59+CsCQIUM8r0VFRTF16lQSExMJDAwkNzeXwMBAAHJzc5k7dy6tra2kp6d7gkdERDqHxTAMw99FdKbKysoOXYNPfnaBD6uR17K2+bsEEfFCe++dfrucJSIi3Z9CRERETFOIiIiIaQoRERExTSEiIiKmKURERMQ0hYiIiJimEBEREdMUIiIiYppCRERETFOIiIiIaQoRERExTSEiIiKmKURERMQ0hYiIiJimEBEREdMUIiIiYppCRERETFOIiIiIaQoRERExLchfB7bb7fTt25eAgAACAwMpKiqioaGBJUuW8OWXXzJkyBA2bdpEWFgYhmGwbt06ysrK6N27N+vXr+euu+4CYNeuXWzbtg2ABQsW8MADD/irSyIiPY5fz0Sef/55SkpKKCoqAiA/P5/Y2FhKS0uJjY0lPz8fgPLycqqqqigtLWXNmjWsWrUKgIaGBrZu3crOnTt55ZVX2Lp1K42Njf7qjohIj9OlLme5XC5SU1MBSE1NZd++fW3aLRYLMTExnDlzhtraWg4cOMDYsWMJDw8nLCyMsWPHsn//fn92QUSkR/FriMyZM4e0tDRefvllAOrq6rBarQAMGjSIuro6ANxuNxEREZ7tIiIicLvdV7XbbDbcbncn9kBEpGfz2z2RF198EZvNRl1dHZmZmQwbNqzN6xaLBYvFcsOP29zcTGVlpVfrRkdH3/Djy9W8HQ8R6Xr8FiI2mw2AAQMGEB8fz9GjRxkwYAC1tbVYrVZqa2vp37+/Z92amhrPtjU1NdhsNmw2G4cPH/a0u91u7rvvvuseNyQkROHQxWg8RLq+9j7s+eVy1tdff01TU5Pn53feeYeoqCjsdjvFxcUAFBcXM2nSJABPu2EYVFRUcPPNN2O1Whk3bhwHDhygsbGRxsZGDhw4wLhx4/zRJRGRHskvZyJ1dXVkZWUB0NraSlJSEnFxcYwcOZLs7GwKCwsZPHgwmzZtAmDChAmUlZURHx9Pnz59eOKJJwAIDw9n4cKFTJ8+HYCsrCzCw8P90SURkR7JYhiG4e8iOlNlZWWHLp8kP7vAh9XIa1nb/F2CiHihvffOLvUVXxER6V4UIiIiYppCRERETFOIiIiIaQoRERExTSEiIiKmKURERMQ0hYiIiJimEBEREdMUIiIiYppCRERETFOIiIiIaQoRERExTSEiIiKmKURERMQ0hYiIiJimEBEREdMUIiIiYppCRERETFOIiIiIaQoR+d662HrB3yV87+l3LEGdfcC//vWvLFu2jLq6OiwWCw8++CCzZs1iy5Yt7Ny5k/79+wOwdOlSJkyYAMD27dspLCwkICCAxx9/nPHjxwNQXl7OunXruHTpEhkZGcybN6+zuyNdWFBgL55wLvV3Gd9rKxz/298liJ91eogEBgayfPly7rrrLpqamkhPT2fs2LEAzJ49mzlz5rRZ//jx4zidTpxOJ263m8zMTPbu3QtAXl4ef/jDH7DZbEyfPh273c7w4cM7u0siIj1Wp4eI1WrFarUCEBoayrBhw3C73e2u73K5cDgcBAcHExkZydChQzl69CgAQ4cOJTIyEgCHw4HL5VKIiIh0ok4PkW86deoUlZWVjB49mo8++ogdO3ZQXFzMiBEjWL58OWFhYbjdbkaPHu3ZxmazeUInIiKiTfuVcLme5uZmKisrvaovOjq6gz0SM7wdj47S+HUOX42fdA9+C5GvvvqKxYsXs2LFCkJDQ5k5cyYLFy7EYrHwzDPPsH79ep588skbftyQkBC9uXQxGo/uTePXM7T3YcEv3866cOECixcvJjk5mYSEBAAGDhxIYGAgAQEBZGRk8PHHHwOXzzBqamo827rdbmw2W7vtItL9tV7St7587Ub9jjv9TMQwDB577DGGDRtGZmamp722ttZzr2Tfvn1ERUUBYLfbefTRR8nMzMTtdlNVVcWoUaMwDIOqqiqqq6ux2Ww4nU42btzY2d0RER8IDOhF6f9Z5u8yvtcSRv/mhuyn00Pkww8/pKSkhDvuuIOUlBTg8td5d+/ezaeffgrAkCFDyMvLAyAqKoqpU6eSmJhIYGAgubm5BAYGApCbm8vcuXNpbW0lPT3dEzwiItI5Oj1ExowZw2effXZV+5VnQq5lwYIFLFiw4JrbXG87ERHxLT2xLiIipilERETENIWIiIiYphARERHTFCIiImKaQkRERExTiIiIiGkKERERMU0hIiIipilERETENIWIiIiYphARERHTFCIiImKaQkRERExTiIiIiGkKERERMU0hIiIipilERETENIWIiIiYphARERHTFCIiImJatw+R8vJyJk+eTHx8PPn5+f4uR0SkR+nWIdLa2kpeXh4FBQU4nU52797N8ePH/V2WiEiP0a1D5OjRowwdOpTIyEiCg4NxOBy4XC5/lyUi0mME+buA78LtdhMREeFZttlsHD169LrbNDc3U1lZ6fUxfmNfbLo++XYdGQszHhj2v3y6/57Ol+MXGZzps31Lx8euubn5mu3dOkTMiImJ8XcJIiLfG936cpbNZqOmpsaz7Ha7sdlsfqxIRKRn6dYhMnLkSKqqqqiurqalpQWn04ndbvd3WSIiPUa3vpwVFBREbm4uc+fOpbW1lfT0dKKiovxdlohIj2ExDMPwdxEiItI9devLWSIi4l8KERERMU0h0gVER0eTkpJCUlISixcv5ty5cx3a3u12s3jx5edZKisrKSsr87zmcrk0HUwn+OEPf8j69es9y7///e/ZsmWLqX2dOXOGHTt2mNrWbrdz+vRpU9v2FDdyrK7nueeea7P805/+9IYfoytQiHQBvXv3pqSkhN27d9OrVy9eeumlDm1vs9nYvHkzcHWITJo0iXnz5t3QeuVqwcHBlJaW3pA38DNnzvDiiy9e87WLFy9+5/33dDdyrK5n+/btbZY7+u+6u1CIdDFjxozhiy++oKGhgYULF5KcnMyDDz7Ip59+CsDhw4dJSUkhJSWF1NRUmpqaOHXqFElJSbS0tLB582b27NlDSkoKe/bsoaioiLy8PM6ePcvEiRO5dOkSAF9//TUTJkzgwoULnDx5kjlz5pCWlsZDDz3E559/7s9fQbcUFBTEjBkzeP7556967fTp0yxatIj09HTS09P58MMPAdiyZQu///3vPeslJSVx6tQpNm7cyMmTJ0lJSWHDhg0cOnSIhx56iPnz5+NwOABYuHAhaWlpOBwOXn755c7p5PeEmbE6ffo0mZmZOBwOHnvsMSZOnOgJoWuNxdNPP8358+dJSUnh0UcfBeDuu+8GYMmSJbz99tueYy5fvpw33niD1tZWNmzYQHp6OsnJyd0ndAzxu5iYGMMwDOPChQvG/PnzjR07dhh5eXnGli1bDMMwjIMHDxrTpk0zDMMw/vmf/9n44IMPDMMwjKamJuPChQtGdXW14XA4DMMwjD/96U/G6tWrPfv+5vL8+fONd9991zAMw3A6ncaKFSsMwzCMRx55xDhx4oRhGIZRUVFhPPzwwz7u8fdPTEyMcfbsWWPixInGmTNnjIKCAmPz5s2GYRjG0qVLjffff98wDMP48ssvjSlTphiGYRibN282CgoKPPtwOBxGdXV1m/E0DMN47733jNGjRxsnT570tNXX1xuGYRjnzp0zHA6Hcfr0acMwDGPixIlGXV2dbzvbzZkZq9WrVxvPPfecYRiGUVZWZtxxxx2e33N7Y3Hl3/U3j2sYhlFaWmosW7bMMAzDaG5uNuLi4oxz584ZL730kvHss8962h944IE2Y95VdevnRL4vrnxigctnItOnT+fBBx/0XKeNjY2loaGBpqYm7rnnHtavX09ycjIJCQn07dvX6+MkJiayZ88efvKTn+B0OnnooYf46quvOHLkCL/85S8967W0tNzYDvYQoaGhpKSk8B//8R/07t3b037w4ME2s0s3NTXx1VdfdWjfI0eOJDIy0rP8wgsv8OabbwLw17/+lS+++IJ+/fp9xx70HB0dqw8//JCtW7cCEBcXR1hYmGedjo5FXFwc69ato6WlhfLycsaMGUPv3r155513+Oyzz9i7dy8AZ8+e5Ysvvmgz7l2RQqQLuHJPxBvz5s1jwoQJlJWVMXPmTAoKCggJCfFqW7vdzm9/+1saGhr45JNP+MlPfsK5c+e45ZZbvD6+XN+sWbNIS0sjLS3N03bp0iV27tx51TgFBgZ6Li9C+xPcAdx0002enw8dOsTBgwd5+eWX6dOnDw8//PB1t5Vr68hYtcfMWISEhHDfffexf/9+Xn/9dRITEwEwDIPHH3+c8ePHm++UH+ieSBc1ZswYXn31VeDy/6j9+vUjNDSUkydP8sMf/pB58+YxcuRITpw40Wa7vn37tvspt2/fvowYMYJ169bxT//0TwQGBhIaGsqtt97K66+/Dlz+H/nK/RfpuPDwcKZMmUJhYaGnbdy4cbzwwgue5Suzpw4ZMoS//OUvAHzyySecOnUKuP4YwuVPqGFhYfTp04fPP/+ciooKX3Tle68jY3XPPfd4/o0cOHCAxsZG4PpjERQUxIULF6557MTERIqKivjggw88oTFu3DhefPFFzzYnTpzg66+/voE99g2FSBf1i1/8gk8++YTk5GQ2btzo+Uri888/T1JSEsnJyQQFBREXF9dmux//+MccP37cc2P97yUmJvLqq696Pv0APPXUUxQWFjJt2jQcDgf79u3zbee+537+859TX1/vWX7sscc4duwYycnJJCYmer55NXnyZBobG3E4HPzxj3/k9ttvB6Bfv37cc889JCUlsWHDhqv2HxcXx8WLF5k6dSobN27UzNTfgbdj9Ytf/IJ33nmHpKQk3njjDQYNGkRoaOh1x+LBBx9k2rRpnhvr3zR27Fjef/99/vEf/5Hg4GAAMjIyGD58OGlpaSQlJZGbm0tra6uPfwPfnaY9ERH5Fi0tLQQEBBAUFMSRI0dYtWqVLgH/P7onIiLyLf7nf/6H7OxsLl26RK9evVizZo2/S+oydCYiIiKm6Z6IiIiYphARERHTFCIiImKaQkRERExTiEiP1ZEp+K9MZNkRH3/8MWvXrgUuPzD60Ucfmapz+fLl2O12UlJSmDZtGu++++63btNTpiEX/1OISI/1Xafgv56LFy8ycuRIHn/8ceDy7MtHjhwxvb9ly5ZRUlLCihUrWLly5beu31OmIRf/03MiIlyeZuazzz6joaGBFStWUF1dTZ8+fcjLy+POO+9ss9+11ioAAANvSURBVO6f//xntm3bxoULFwgPD+fpp59m4MCBbNmyhZMnT1JdXc3gwYOZMWMG//Zv/8avf/1rXnrpJQICAnj11Vf59a9/zbJly9i7dy+9evWiqamJadOmeZav5+6778btdnuWFy5cSE1NDc3NzTzyyCPMmDGjzTTkw4cPZ+PGjdx9990cOXKEQ4cOsXXrVvr168d//dd/cdddd/H0009jsVgoKyvjySef5KabbuKee+6hurr6qjAS+XsKEenxLl68SHl5OePHj2fLli38wz/8A7/73e949913+dd//dernkz+0Y9+xM6dO7FYLLzyyisUFBSwfPlyAD7//HP+8z//k969e3Po0CEAbr31Vn76059y0003MWfOHODy9DRlZWXcf//9OJ1OEhISvjVAAPbv38/999/vWX7iiScIDw/n/PnzTJ8+nYSEBP7lX/6FHTt2tPtE9V/+8hecTidWq5WZM2fy4YcfMnLkSHJzc/njH/9IZGQkS5cuNfW7lJ5HISI9Vkem4P+mmpoalixZwt/+9jdaWlq49dZbPa/Z7fY2U4u3Z/r06RQUFHD//fdTVFT0rU9A/+Y3v+G3v/0tNTU1bf4IlZkp4UeNGkVERAQAd955J19++SV9+/YlMjLSM+24w+Fg586d39oPEYWI9FgdmYL/m9auXcvs2bOZNGmS5/LQFX369PFqHz/60Y9YvXo1hw4dorW1lTvuuOO66y9btowpU6bwwgsvsGLFCoqKikxPCX9lwj+4PB19d5jkT7ou3VgX+Yb2puD/prNnz2Kz2QAoLi72ar/Xmt49NTWVRx99tM3fs/g2P/vZz7h06RL79+83PQ35tfzgBz+gurraMx39tWaAFrkWhYjIN7Q3Bf/fr/PLX/6StLQ0wsPDvdrvxIkTefPNN0lJSeGDDz4AIDk5mTNnzpCUlOR1fRaLhQULFlBQUGB6GvJr6d27NytXrmTu3LmkpaXRt2/fq8JT5Fo0AaOIn7zxxhu4XC6eeuopf5cCwFdffUXfvn0xDIPVq1dz++23M3v2bH+XJV2c7omI+MGaNWsoLy8nPz/f36V4vPLKK+zatYsLFy4QHR3NjBkz/F2SdAM6ExHpIlavXn3VU+2PPPII6enpfqpI5NspRERExDTdWBcREdMUIiIiYppCRERETFOIiIiIaf8XBFWByLoBXVwAAAAASUVORK5CYII=\n",
            "text/plain": [
              "<Figure size 432x288 with 1 Axes>"
            ]
          },
          "metadata": {
            "tags": []
          }
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "l2ppN_Gj3MXZ"
      },
      "source": [
        "**Data Preprocessing**"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "KMryD2Ok3PbF"
      },
      "source": [
        "df_Positive = df[df['Polarity_Rating'] == 'Positive'][0:8000]\n",
        "df_Neutral = df[df['Polarity_Rating'] == 'Neutral']\n",
        "df_Negative = df[df['Polarity_Rating'] == 'Negative']"
      ],
      "execution_count": 14,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "mUZim1eA3qDF"
      },
      "source": [
        "**Sample negative and neutral polarity dataset and create final dataframe**"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "9PqqFadV4Qaf"
      },
      "source": [
        "df_Neutral_over = df_Neutral.sample(8000, replace=True)\n",
        "df_Negative_over = df_Negative.sample(8000, replace=True)\n",
        "df = pd.concat([df_Positive, df_Neutral_over, df_Negative_over], axis=0)"
      ],
      "execution_count": 15,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "B3so3XNt5PXg"
      },
      "source": [
        "**Text Preprocessing**"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "mvi7pt-d5R6E"
      },
      "source": [
        "def get_text_processing(text):\n",
        "    stpword = stopwords.words('english')\n",
        "    no_punctuation = [char for char in text if char not in string.punctuation]\n",
        "    no_punctuation = ''.join(no_punctuation)\n",
        "    return ' '.join([word for word in no_punctuation.split() if word.lower() not in stpword])"
      ],
      "execution_count": 16,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "1F75BGMk5zbT"
      },
      "source": [
        "**Apply the method \"get_text_processing\" into column review text**"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 306
        },
        "id": "52_Rbc3p5953",
        "outputId": "5e231d9d-8ea7-4821-d0a6-e8f956efb97f"
      },
      "source": [
        "df['review'] = df['Review Text'].apply(get_text_processing)\n",
        "df.head()"
      ],
      "execution_count": 17,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Clothing ID</th>\n",
              "      <th>Age</th>\n",
              "      <th>Review Text</th>\n",
              "      <th>Rating</th>\n",
              "      <th>Recommended IND</th>\n",
              "      <th>Division Name</th>\n",
              "      <th>Department Name</th>\n",
              "      <th>Class Name</th>\n",
              "      <th>Polarity_Rating</th>\n",
              "      <th>review</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>767</td>\n",
              "      <td>33</td>\n",
              "      <td>Absolutely wonderful - silky and sexy and comf...</td>\n",
              "      <td>4</td>\n",
              "      <td>1</td>\n",
              "      <td>Initmates</td>\n",
              "      <td>Intimate</td>\n",
              "      <td>Intimates</td>\n",
              "      <td>Positive</td>\n",
              "      <td>Absolutely wonderful silky sexy comfortable</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>1080</td>\n",
              "      <td>34</td>\n",
              "      <td>Love this dress!  it's sooo pretty.  i happene...</td>\n",
              "      <td>5</td>\n",
              "      <td>1</td>\n",
              "      <td>General</td>\n",
              "      <td>Dresses</td>\n",
              "      <td>Dresses</td>\n",
              "      <td>Positive</td>\n",
              "      <td>Love dress sooo pretty happened find store im ...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>1049</td>\n",
              "      <td>50</td>\n",
              "      <td>I love, love, love this jumpsuit. it's fun, fl...</td>\n",
              "      <td>5</td>\n",
              "      <td>1</td>\n",
              "      <td>General Petite</td>\n",
              "      <td>Bottoms</td>\n",
              "      <td>Pants</td>\n",
              "      <td>Positive</td>\n",
              "      <td>love love love jumpsuit fun flirty fabulous ev...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>847</td>\n",
              "      <td>47</td>\n",
              "      <td>This shirt is very flattering to all due to th...</td>\n",
              "      <td>5</td>\n",
              "      <td>1</td>\n",
              "      <td>General</td>\n",
              "      <td>Tops</td>\n",
              "      <td>Blouses</td>\n",
              "      <td>Positive</td>\n",
              "      <td>shirt flattering due adjustable front tie perf...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6</th>\n",
              "      <td>858</td>\n",
              "      <td>39</td>\n",
              "      <td>I aded this in my basket at hte last mintue to...</td>\n",
              "      <td>5</td>\n",
              "      <td>1</td>\n",
              "      <td>General Petite</td>\n",
              "      <td>Tops</td>\n",
              "      <td>Knits</td>\n",
              "      <td>Positive</td>\n",
              "      <td>aded basket hte last mintue see would look lik...</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "   Clothing ID  ...                                             review\n",
              "0          767  ...        Absolutely wonderful silky sexy comfortable\n",
              "1         1080  ...  Love dress sooo pretty happened find store im ...\n",
              "3         1049  ...  love love love jumpsuit fun flirty fabulous ev...\n",
              "4          847  ...  shirt flattering due adjustable front tie perf...\n",
              "6          858  ...  aded basket hte last mintue see would look lik...\n",
              "\n",
              "[5 rows x 10 columns]"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 17
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "Fhxnn-pj7R6P"
      },
      "source": [
        "**Visualize Text Review with Polarity Rating**"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 202
        },
        "id": "yFeqv9LG7N5N",
        "outputId": "a06a4655-55ed-4748-e2c4-90d8820c70b1"
      },
      "source": [
        "df = df[['review', 'Polarity_Rating']]\n",
        "df.head()"
      ],
      "execution_count": 18,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>review</th>\n",
              "      <th>Polarity_Rating</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>Absolutely wonderful silky sexy comfortable</td>\n",
              "      <td>Positive</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>Love dress sooo pretty happened find store im ...</td>\n",
              "      <td>Positive</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>love love love jumpsuit fun flirty fabulous ev...</td>\n",
              "      <td>Positive</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>shirt flattering due adjustable front tie perf...</td>\n",
              "      <td>Positive</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6</th>\n",
              "      <td>aded basket hte last mintue see would look lik...</td>\n",
              "      <td>Positive</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "                                              review Polarity_Rating\n",
              "0        Absolutely wonderful silky sexy comfortable        Positive\n",
              "1  Love dress sooo pretty happened find store im ...        Positive\n",
              "3  love love love jumpsuit fun flirty fabulous ev...        Positive\n",
              "4  shirt flattering due adjustable front tie perf...        Positive\n",
              "6  aded basket hte last mintue see would look lik...        Positive"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 18
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "rwpsuZPk8HCS"
      },
      "source": [
        "**Apply One hot encoding on negative, neutral, and positive**"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 202
        },
        "id": "lHrXiYDQ8Pj6",
        "outputId": "a80ce8f8-e4f3-4a6a-9f39-0f95d17d0007"
      },
      "source": [
        "one_hot = pd.get_dummies(df[\"Polarity_Rating\"])\n",
        "df.drop(['Polarity_Rating'],axis=1,inplace=True)\n",
        "df = pd.concat([df,one_hot],axis=1)\n",
        "df.head()"
      ],
      "execution_count": 19,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>review</th>\n",
              "      <th>Negative</th>\n",
              "      <th>Neutral</th>\n",
              "      <th>Positive</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>Absolutely wonderful silky sexy comfortable</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>Love dress sooo pretty happened find store im ...</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>love love love jumpsuit fun flirty fabulous ev...</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>shirt flattering due adjustable front tie perf...</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6</th>\n",
              "      <td>aded basket hte last mintue see would look lik...</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "                                              review  ...  Positive\n",
              "0        Absolutely wonderful silky sexy comfortable  ...         1\n",
              "1  Love dress sooo pretty happened find store im ...  ...         1\n",
              "3  love love love jumpsuit fun flirty fabulous ev...  ...         1\n",
              "4  shirt flattering due adjustable front tie perf...  ...         1\n",
              "6  aded basket hte last mintue see would look lik...  ...         1\n",
              "\n",
              "[5 rows x 4 columns]"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 19
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "O63qBfKuE2nu"
      },
      "source": [
        "**Apply Train Test Split**"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "6PNWLYhME7Pf"
      },
      "source": [
        "X = df['review'].values\n",
        "y = df.drop('review', axis=1).values\n",
        "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)"
      ],
      "execution_count": 20,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "AuADdT8UFgOy"
      },
      "source": [
        "**Apply vectorization**"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "r20vXgZ6Fvqw"
      },
      "source": [
        "vect = CountVectorizer()\n",
        "X_train = vect.fit_transform(X_train)\n",
        "X_test = vect.transform(X_test)"
      ],
      "execution_count": 21,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "akiDMHsvGNxD"
      },
      "source": [
        "**Apply frequency, inverse document frequency:**"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "_q3-ppruGRz7"
      },
      "source": [
        "tfidf = TfidfTransformer()\n",
        "X_train = tfidf.fit_transform(X_train)\n",
        "X_test = tfidf.transform(X_test)\n",
        "X_train = X_train.toarray()\n",
        "X_test = X_test.toarray()"
      ],
      "execution_count": 22,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "rXV_Xf5kHB73"
      },
      "source": [
        "**Add different layers**"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "-Su4eu41HAUT"
      },
      "source": [
        "model = Sequential()\n",
        "\n",
        "model.add(Dense(units=12673,activation='relu'))\n",
        "model.add(Dropout(0.5))\n",
        "\n",
        "model.add(Dense(units=4000,activation='relu'))\n",
        "model.add(Dropout(0.5))\n",
        "\n",
        "model.add(Dense(units=500,activation='relu'))\n",
        "model.add(Dropout(0.5))\n",
        "\n",
        "model.add(Dense(units=3, activation='softmax'))\n",
        "\n",
        "opt=tf.keras.optimizers.Adam(learning_rate=0.001)\n",
        "model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])\n",
        "\n",
        "early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=2)"
      ],
      "execution_count": 23,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "NYGn2m1lIFvo"
      },
      "source": [
        "**Fit the Model**"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "NtKRqIcYIEev",
        "outputId": "0de54d2f-bc01-4606-e290-1f0b216baab5"
      },
      "source": [
        "model.fit(x=X_train, y=y_train, batch_size=256, epochs=100, validation_data=(X_test, y_test), verbose=1, callbacks=early_stop)"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Epoch 1/100\n",
            "58/66 [=========================>....] - ETA: 41s - loss: 0.6450 - accuracy: 0.7124"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "mOSbaNrqJBeP"
      },
      "source": [
        "**Evaluation of Model**"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "ZTsIgolQJD2M"
      },
      "source": [
        "model_score = model.evaluate(X_test, y_test, batch_size=64, verbose=1)\n",
        "print('Test accuracy:', model_score[1])"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "s6c7yRFKJUK4"
      },
      "source": [
        "**Prediction**"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Q34Yb_0pJW3r"
      },
      "source": [
        "preds = model.predict(X_test)\n",
        "preds"
      ],
      "execution_count": null,
      "outputs": []
    }
  ]
}